function [tdata, tdataflds, rejectcellnum]=data_tform(data,dataflds,lnglo,lnfov,zglo,zfov,filtdx);
%
% Usage: [tdata, tdataflds, rejectcellnum]=data_tform(data,dataflds,lnglo,lnfov,zglo,zfov,filtdx);
%
% This script is used to normalize per-cell data derived from CellProfiler.
% Rows correspond to individual cells and columns correspond to data
% classes, for example expression levels of different genes. The data field
% names must be passed to the script, and one of the field names must
% include the string 'Object_Number' --- that field is interpreted by the
% script to be the cell index number.
%
% The data can be either Z-score normalized or log-normalized, and each 
% column can be normalized over all of the cells in a data set, or based 
% on all of the cells on a per- field of view basis (i.e. all cells in an 
% fov are normalized relative to each other). 
%
% For log normalization, the log of the largest value in the peer group
% (MaxVal) is set equal to RANGE, and all values that are less than 
% 10^-(2*RANGE)*MaxVal are set equal to -RANGE. Thus, RANGE sets
% the lower threshold at which a difference in an observable value can be
% meaningfully distinguished. With RANGE=1, the cell with MaxVal will be 
% assigned a log-normalized value of 1, and any cell with a raw value of
% less than 1/100th of MaxVal will be assigned a log-normalized value of
% -1.
%
% MINMAXDATA is used to eliminat cells that have no meaningful data.
% Specificaly, if the maximum normalized value for a cell (maximum over all
% of the columns specified in filtx) is less that MINMAXDATA, then the cell
% is eliminated from the data set, and its cell number is recorded in the
% rejectcellnum list.
%
% BLOCKSIZE is the interval over which the cell numbers from each field of
% view are spaced (an integer multiple of BLOCKSIZE is added to the cell
% indces of each field of view, so as to distinguish which field each cell
% came from).
%
% lnglo - columns to log-normalize globally
% lnfov - columns to log-normalize on a per-fov basis
% zglo - columns to zscore-normalize globally
% zfov - columns to zscore-normalize on a per-fov basis
% filtdx is a vector of data columns used for cell filtering
%
RANGE=1; eps=10^-RANGE; 
MINMAXDATA=-0.5;
BLOCKSIZE=1000;
%
tdataflds=dataflds;
tdataflds=replace(tdataflds(:),DATATYPE1,'');
tdataflds=replace(tdataflds(:),DATATYPE2,'');
tdataflds=replace(tdataflds(:),DATATYPE3,'');
tdataflds=replace(tdataflds(:),DATATYPE4,'');
%
cellobjdx=find(~cellfun('isempty',strfind(dataflds,'Object_Number')));
cellobjnum=data(:,cellobjdx);
tdata=data;
%
%
%
% NORMALIZE DATA PER GENE AGGREGATED OVER ALL CELLS
% columns to be log normalized
for coldx=lnglo
    tdata(:,coldx)=tdata(:,coldx)/(eps*max(tdata(:,coldx)));
    tdata(:,coldx)=log10(eps+tdata(:,coldx));
end
% columns to be zscore normalized
for coldx=zglo
    tdata(:,coldx)=zscore(tdata(:,coldx));
    tdata(:,coldx)=RANGE*tdata(:,coldx)/max(abs(tdata(:,coldx)));
end
%
%
%
% NORMALIZE DATA PER GENE ON A PER FIELD OF VIEW BASIS
maxfovdx=floor(max(cellobjnum)/BLOCKSIZE);
% columns to be log normalized
for coldx=lnfov
    for fovdx= 1 : maxfovdx
        blockdx=find(cellobjnum > fovdx*BLOCKSIZE & cellobjnum < (fovdx+1)*BLOCKSIZE);
        tdata(blockdx,coldx)=tdata(blockdx,coldx)/(eps*max(tdata(blockdx,coldx)));
        tdata(blockdx,coldx)=log10(eps+tdata(blockdx,coldx));
    end
end
% columns to be zscore normalized
for coldx=zfov
    for fovdx= 1 : maxfovdx
        blockdx=find(cellobjnum > fovdx*BLOCKSIZE & cellobjnum < (fovdx+1)*BLOCKSIZE);
        tdata(blockdx,coldx)=zscore(tdata(blockdx,coldx));
        tdata(blockdx,coldx)=RANGE*tdata(blockdx,coldx)/max(abs(tdata(blockdx,coldx)));
    end
end
%
%
%
% ELIMINATE CELLS WITH LESS THAN MINMAX VALUE IN ALL OF THE FILTER COLUMNS
tdata(:,cellobjdx)=cellobjnum;
lcelldata=logical(max(tdata(:,filtdx),[],2)>=MINMAXDATA);
filtcelldx=find(lcelldata==1); size(filtcelldx)
rejectdx=find(lcelldata==0); rejectcellnum=cellobjnum(rejectdx);
tdata=tdata(filtcelldx,:);




